Data processing

# Loading all packages
library(Seurat); library(hdf5r); library(tidyverse)
## Loading required package: SeuratObject
## Loading required package: sp
## 'SeuratObject' was built under R 4.4.0 but the current version is
## 4.4.1; it is recomended that you reinstall 'SeuratObject' as the ABI
## for R may have changed
## 'SeuratObject' was built with package 'Matrix' 1.7.0 but the current
## version is 1.7.1; it is recomended that you reinstall 'SeuratObject' as
## the ABI for 'Matrix' may have changed
## 
## Attaching package: 'SeuratObject'
## The following objects are masked from 'package:base':
## 
##     intersect, t
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()     masks stats::filter()
## ✖ purrr::flatten_df() masks hdf5r::flatten_df()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Load data
matrix = Read10X_h5("/Users/tranchau/Documents/GFP_2024/Data/S11_WER_GFP/filtered_feature_bc_matrix.h5")

# Create Seurat object
Sample <- CreateSeuratObject(matrix, project = "WER")
VlnPlot(Sample, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)

# Quality control
Sample <- subset(Sample, subset = nFeature_RNA > 500 & nFeature_RNA < 8000 & nCount_RNA > 500 & nCount_RNA < 40000 )
VlnPlot(Sample, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)

# Normalize data 
Sample <- NormalizeData(object = Sample, verbose = FALSE)

# Find highly variable genes
Sample <- FindVariableFeatures(object = Sample, selection.method = "vst", verbose = FALSE)

# Scale data
Sample <- ScaleData(Sample,  features = rownames(Sample))
## Centering and scaling data matrix
# Reduce the dimention of the data
Sample <- RunPCA(Sample, features = VariableFeatures(object = Sample), verbose = FALSE)
ElbowPlot(Sample)

# Clustering
Sample <- FindNeighbors(Sample, reduction = "pca", dims = 1:30) 
## Computing nearest neighbor graph
## Computing SNN
Sample <- FindClusters(Sample, resolution = 0.5) 
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8572
## Number of edges: 289043
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9109
## Number of communities: 17
## Elapsed time: 0 seconds
# Plot UMAP
Sample <- RunUMAP(Sample, reduction = "pca", dims = 1:30) 
## 15:52:03 UMAP embedding parameters a = 0.9922 b = 1.112
## 15:52:03 Read 8572 rows and found 30 numeric columns
## 15:52:03 Using Annoy for neighbor search, n_neighbors = 30
## 15:52:03 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 15:52:04 Writing NN index file to temp file /var/folders/bc/jp_ttqgx5mjdbhjxqr1w7p4w0000gn/T//Rtmp7PWts9/file1686789415bf
## 15:52:04 Searching Annoy index using 1 thread, search_k = 3000
## 15:52:05 Annoy recall = 100%
## 15:52:05 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 15:52:06 Initializing from normalized Laplacian + noise (using RSpectra)
## 15:52:06 Commencing optimization for 500 epochs, with 370538 positive edges
## 15:52:15 Optimization finished
DimPlot(Sample, reduction = "umap", label = TRUE)

#saveRDS(Sample, "/Users/tranchau/Documents/GFP_2024/Data/S11_WER_GFP/Seurat_Obj_WER.rds")

Label for each cluster

Sample_rename <- RenameIdents(object = Sample, '0' = "MZ", '1' = "Cortex", '2' = "Stele", '3'= "Hair", '4'= "Rootcap", '5' = "Endodermis", '6' = "Rootcap", '7'= "MZ", '8' = "Nonhair", '9' = "Rootcap", '10'= "Phloem", '11' = "MZ", '12' = "MZ", '13' = "Xylem", '14' = "Rootcap", '15' = "Rootcap", '16' = "Stele")


# Plot the UMAP with cell type annotation
DimPlot(Sample_rename, reduction = "umap", label = TRUE) 

Feature plots

FeaturePlot(Sample, features = "GFP")

FeaturePlot(Sample, features = "gene:AT5G14750")

Dotplot

DotPlot(object = Sample_rename, features = c("gene:AT1G62510", "gene:AT5G53370", "gene:AT4G14010", "gene:AT1G44800", "gene:AT1G64660", "gene:AT2G46890", "gene:AT1G27740", "gene:AT4G34580",  "gene:AT1G62980", "gene:AT5G17520", "gene:AT3G52180",  "gene:AT3G11550", "gene:AT2G36100",  "gene:AT2G14900", "gene:AT5G45210", "gene:AT2G37260", "gene:AT5G40330", "gene:AT1G65310", "gene:AT1G79430", "gene:AT5G50720", "gene:AT1G68810",  "gene:AT4G35350", "gene:AT1G71930"), cols = "RdYlBu",  col.min= -2, col.max = 2, dot.scale = 4) + 
  theme(axis.text.x = element_text(size=8, angle = 90, hjust=1), 
        axis.text.y = element_text(size=10, angle = 0, hjust=1), 
        axis.title.y  = element_text(size=15, angle = 90, vjust=1),
        legend.title = element_text(size=10),
        legend.text = element_text(size=8)) + 
  xlab('Gene') +  
  ylab('Cell type')

DoHeatmap plot

# Find marker genes
Sample_rename_marker <- FindAllMarkers(Sample_rename, only.pos = TRUE, min.pct = 0.5, logfc.threshold = 0.5) %>% 
                                  group_by(cluster) %>% 
                                  arrange(cluster, desc(avg_log2FC))
## Calculating cluster MZ
## For a (much!) faster implementation of the Wilcoxon Rank Sum Test,
## (default method for FindMarkers) please install the presto package
## --------------------------------------------
## install.packages('devtools')
## devtools::install_github('immunogenomics/presto')
## --------------------------------------------
## After installation of presto, Seurat will automatically use the more 
## efficient implementation (no further action necessary).
## This message will be shown once per session
## Calculating cluster Cortex
## Calculating cluster Stele
## Calculating cluster Hair
## Calculating cluster Rootcap
## Calculating cluster Endodermis
## Calculating cluster Nonhair
## Calculating cluster Phloem
## Calculating cluster Xylem
DoHeatmap(Sample_rename, features = Sample_rename_marker$gene, angle = 120) + NoLegend() + scale_fill_gradientn(colors = c("tomato", "white", "darkgreen"))
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.